knitr::opts_chunk$set(echo = TRUE)
knitr::opts_chunk$set(message = F)
knitr::opts_chunk$set(warning = F)
### Libraries ----------------------------
# data package
library(aninet) #https://github.com/MNWeiss/aninet
# for plotting
library(ggplot2)
# for creating design matrix
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
# igraph objects + functions
library(igraph)
##
## Attaching package: 'igraph'
## The following object is masked from 'package:tidyr':
##
## crossing
## The following objects are masked from 'package:dplyr':
##
## as_data_frame, groups, union
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
# parallelization is used in simulation
library(foreach)
library(parallel)
# model estimation
library(GLMMadaptive)
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:GLMMadaptive':
##
## negative.binomial
## The following object is masked from 'package:dplyr':
##
## select
# library(glmmTMB) Laplace model fitting algorithm
# library(centiserve) Other centrality statistics
# Other needed Functions
source("funcs/simulation-funcs.R")# function to run simulation
source("funcs/munk-czado-funcs.R") # functions to calculate trimmed mallows distance
source("funcs/whale-plot-funcs.R") # functions to make plots
### Setting up Parallelization ----------------------
n.cores <- parallel::detectCores() -1
my.cluster <- parallel::makeCluster(
n.cores,
type = "PSOCK"
)
my.cluster
## socket cluster with 7 nodes on host 'localhost'
doParallel::registerDoParallel(cl = my.cluster)
paste(("is registered:"), (foreach::getDoParRegistered()))
## [1] "is registered: TRUE"
paste(("workers:"), (foreach::getDoParWorkers()))
## [1] "workers: 7"
### load the data ---------------------
### Bulding Whale graph
gwhale.contact <- graph_from_adjacency_matrix(srkw_contact,
mode = "undirected",
weighted = T)
gwhale.surface <- graph_from_adjacency_matrix(srkw_surfacing,
mode = "undirected",
weighted = T)
n <- gorder(gwhale.contact)
### clusters -------------------------
set.seed(252784)
m0 <- factor(srkw_attributes$matriline, labels = 1:6)
m1 <- cluster_walktrap(gwhale.contact, steps = 3)$membership
m2 <- cluster_leading_eigen(gwhale.contact)$membership
m3 <- cluster_infomap(gwhale.contact, E(gwhale.contact)$weight, modularity = T)$membership
m4 <- cluster_fluid_communities(gwhale.contact, 4)$membership
m5 <- cluster_walktrap(gwhale.surface, steps = 3)$membership
m6 <- cluster_leading_eigen(gwhale.surface)$membership
m7 <- cluster_infomap(gwhale.surface, E(gwhale.surface)$weight, modularity = T)$membership
#cluster_edge_closeness(gwhale.surface, weights = log(E(gwhale.surface)$weight + 1, base = 10))$membership
m8 <- cluster_fluid_communities(gwhale.surface, 4)$membership
### Node Positions for plotting
# Define the string
position.raw <-
"id,age,sex,asc,matriline,X,Y
J40,15,0,af,J14s,4.5,0.5
J45,10,1,im,J14s,3.25,0.59
J31,24,0,af,J11s,3.75,4.5
J35,21,0,af,J17s,0,2.5
J39,16,1,am,J11s,3.5,3.25
J44,10,1,im,J17s,-1,4.5
J53,4,0,im,J17s,0.8,3.75
J56,0,0,im,J11s,4.5,2.5
J47,9,1,im,J17s,-1,3.5
J26,28,1,am,J16s,-2.6,2.25
J37,18,0,af,J14s,2.25,0
J38,16,1,am,J22s,-4.25,0.25
J41,14,0,af,J19s,0,-1.5
J46,10,0,im,J17s,0.25,5
J49,7,1,im,J14s,3.75,-0.25
J42,12,0,af,J16s,-4,2.25
J22,34,0,af,J22s,-3.5,-0.5
J27,28,1,am,J11s,5.25,3.75
J19,40,0,af,J19s,-0.5,-0.5
J51,4,1,im,J19s,-1.75,-1.25
J16,47,0,af,J16s,-5,2.75
J36,20,0,af,J16s,-3.5,3"
# Read the string into a data frame
node_df <- read.csv(textConnection(position.raw))
node_positions <- as.matrix(node_df[, c("X", "Y")])
size = scales::rescale(srkw_attributes$age, c(7,20))
r = range(c(E(gwhale.surface)$weight,E(gwhale.contact)$weight ))
width.surface = (E(gwhale.surface)$weight - min(E(gwhale.surface))) /( r[2] - r[1])
width.surface = width.surface * (10 - 0.5) + 0.5
# color palett for matrilines
palett0 <- c( "#9c179e", "#22a884", "#f0f921", "#fca636" ,"#414487", "#e16462" )
p0 <- palett0[m0]
#funtion to get edge colors
get_egdge_colors <- function(w){
max.w <- max(w)
prop.w <- log(w) / log(max.w)
scaled.prop.w <- prop.w / 2 + 1/2
edge.color <-
sapply(scaled.prop.w,
function(x){adjustcolor( "black", alpha.f = x)})
return(edge.color)
}
plot(gwhale.surface,
layout = node_positions,
vertex.label = V(gwhale.surface)$name,
vertex.label.cex = 1,
vertex.label.color = "black",
vertex.color = p0,
vertex.size = size + 5,
vertex.shape = ifelse(srkw_attributes$sex==0, "circle", "square"),
vertex.frame.color = NA,
edge.width =width.surface,
edge.color =get_egdge_colors(E(gwhale.surface)$weight )
)
#edge widths for plotting
width.contact = (E(gwhale.contact)$weight - min(E(gwhale.contact))) /( r[2] - r[1])
width.contact = width.contact * (10 - 0.5) + 0.5
#assign cluster colors
palett1 <- c("#f0f921", "#f89540", "#e16462", "#0d0887", "#9c179e" )
palett2 <- c( "#9c179e","#0d0887", "#f89540", "#f0f921", "#e16462")
palett3 <- c("#9c179e", "#e16462", "#f0f921" )
#c( "#9c179e", "#fca636", "#0d0887", "#e16462", "#6a00a8", "#f0f921")
palett4 <- c( "#f89540", "#9c179e", "#0d0887", "#f0f921")
p1 <- palett1[m1]
p2 <- palett2[m2]
p3 <- palett3[m3]
p4 <- palett4[m4]
plot(gwhale.contact,
layout = node_positions,
vertex.label = V(gwhale.contact)$name,
vertex.label.cex = 1,
vertex.label.color = "black",
vertex.color = p0,
vertex.size = size + 5,
vertex.shape = ifelse(srkw_attributes$sex==0, "circle", "square"),
vertex.frame.color = NA,
edge.width =width.contact,
edge.color =get_egdge_colors(E(gwhale.contact)$weight )
)
plot(gwhale.contact,
layout = node_positions,
vertex.label = NA,
#vertex.label.color = "black",
vertex.color = p1,
vertex.frame.color = NA,
main= "(A) Walktrap",
edge.width =width.contact,
edge.color = "black")
plot(gwhale.contact,
layout = node_positions,
vertex.label = NA,
#vertex.label.color = "black",
vertex.color = p2,
vertex.frame.color = NA,
main= "(B) Leading Eigenvalue",
edge.width =width.contact,
edge.color = "black")
plot(gwhale.contact,
layout = node_positions,
vertex.label = NA,
#vertex.label.color = "black",
vertex.color = p3,
vertex.frame.color = NA,
main= "(C) Infomap",
edge.width = width.contact,
edge.color = "black")
plot(gwhale.contact,
layout = node_positions,
vertex.label = NA,
#vertex.label.color = "black",
vertex.color = p4,
vertex.frame.color = NA,
main= "(D) Fluid Communities",
edge.width =width.contact,
edge.color = "black")
### Centrality Needed for Simulation - Surfacing -----------------
whale.degree <- degree(gwhale.surface)
whale.strength <- strength(gwhale.surface)
whale.eigen <- eigen_centrality(gwhale.surface)$vector
whale.close <- igraph::closeness(gwhale.surface, normalized = T)
whale.between <- igraph::betweenness(gwhale.surface, normalized = T)
dat.plot.true <- data.frame(degree.sim = whale.degree,
strength.sim = whale.strength,
eigen.sim = whale.eigen,
close.sim = whale.close ,
between.sim = whale.between,
id = 20000)
#### Design Matrix ----------------------------------
mother.calf <- data.frame(
mother = c("J16", "J16", "J16", "J37", "J35", "J22", "J31", "J19", "J41"),
calf = c("J26", "J26", "J42", "J49", "J47", "J38", "J56", "J41", "J51")
)
#seetins up ASC2
srkw_attributes <-
srkw_attributes %>%
dplyr::mutate(ASC2 = case_when(
sex == 0 & age < 7 ~ "IM", #imature
sex == 1 & age < 10 ~ "IM", #imature
sex == 0 & age >=7 & age <38 ~ "RF", #reproductive age female
sex == 1 & age >=10 ~ "RM", #reproductive age male
sex == 0 & age >=38 ~ "PF" #post reproductive age female
)) %>%
dplyr::mutate(RA = case_when(
sex == 0 & age < 7 ~ "IM", #imature
sex == 1 & age < 10 ~ "IM", #imature
sex == 0 & age >=7 & age <38 ~ "R", #reproductive age female
sex == 1 & age >=10 ~ "R", #reproductive age male
sex == 0 & age >=38 ~ "P" #post reproductive age female
))
## Make Design Matrix
design.matrix <- as.data.frame(t(combn(1:n, 2)))
design.matrix[, c("name1", "name2",
"kinship", "is.mother",
"age1", "age2", "agediff",
"sex1", "sex2", "samesex", "sexgroup", "mategroup",
"t1", "t2", "t12", "tdiff",
"surface", "contact",
"m1", "m2",
"ASCi", "ASCj", "mate")] <- NA
for(i in 1:nrow(design.matrix)){
#node numbers
v1 <- design.matrix$V1[i]
v2 <- design.matrix$V2[i]
#node names
n1 <- design.matrix$name1[i] <- V(gwhale.contact)$name[v1]
n2 <- design.matrix$name2[i] <- V(gwhale.contact)$name[v2]
#id
id1 <- srkw_attributes$id==n1
id2 <- srkw_attributes$id==n2
#kinship coefficent
design.matrix$m1[i] <- srkw_attributes$matriline[id1]
design.matrix$m2[i] <- srkw_attributes$matriline[id2]
design.matrix$kinship[i] <- srkw_kinship[n1, n2]
design.matrix$is.mother[i] <- any(apply(mother.calf, 1,
function(row){all(row == c(n1, n2) | all(row == c(n2, n1)))
}))
#age coefficents
design.matrix$age1[i] <- srkw_attributes$age[id1]
design.matrix$age2[i] <- srkw_attributes$age[id2]
design.matrix$agediff[i] <- abs(design.matrix$age1[i] - design.matrix$age2[i] )
#sex coefficent
design.matrix$sex1[i] <- ifelse(srkw_attributes$sex[id1] == 0, "F", "M")
design.matrix$sex2[i] <- ifelse(srkw_attributes$sex[id2] == 0, "F", "M")
design.matrix$samesex[i] <- design.matrix$sex1[i] == design.matrix$sex2[i]
design.matrix$sexgroup[i] <- ifelse(!design.matrix$samesex[i], "N", design.matrix$sex1[i])
#ASC
design.matrix$ASCi[i] <- srkw_attributes$RA[id1]
design.matrix$ASCj[i] <- srkw_attributes$RA[id2]
design.matrix$mate[i] <-
(srkw_attributes$ASC2[id1] == "RF" & srkw_attributes$ASC2[id2] == "RM") |
(srkw_attributes$ASC2[id1] == "RM" & srkw_attributes$ASC2[id2] == "RF")
design.matrix$mategroup[i] <-
case_when(
design.matrix$mate[i] == TRUE ~ "RA",
design.matrix$sexgroup[i] == "F" ~ "F",
design.matrix$sexgroup[i] == "M" ~ "M" ,
design.matrix$samesex[i] == F & any(srkw_attributes$RA[id1|id2] == "IM") ~ "NO",
design.matrix$samesex[i] == F & any(srkw_attributes$RA[id1|id2] == "P") ~ "NO",
)
#Time of Filming
design.matrix$t1[i] <- srkw_sampling[n1, n1]
design.matrix$t2[i] <- srkw_sampling[n2, n2]
design.matrix$t12[i] <- srkw_sampling[n1, n2]
design.matrix$tdiff[i] <- design.matrix$t1[i] + design.matrix$t2[i] - design.matrix$t12[i]
#response variables
design.matrix$contact[i] <- srkw_contact[n1, n2]
design.matrix$surface[i] <- srkw_surfacing[n1, n2]
}
design.matrix$is.mother <- as.factor(design.matrix$is.mother)
design.matrix$mategroup <- as.factor(design.matrix$mategroup)
design.matrix$samesex <- as.factor(design.matrix$samesex)
## graph summary info
sum(design.matrix$surface == 0) / nrow(design.matrix) * 100
## [1] 37.66234
mean(design.matrix$surface[design.matrix$surface != 0])
## [1] 11.22917
max(design.matrix$surface[design.matrix$surface != 0])
## [1] 162
sum(design.matrix$contact == 0) / nrow(design.matrix) * 100
## [1] 61.47186
mean(design.matrix$contact[design.matrix$contact != 0])
## [1] 9.337079
max(design.matrix$contact[design.matrix$contact != 0])
## [1] 70
### Running The Simulation ------------------------
n.models <- 2
model_results_surface <- vector(mode = "list", length = n.models)
simulation_results_surface <- vector(mode = "list", length = n.models)
mallow_results_surface <- vector(mode = "list", length = n.models)
set.seed(94353)
for(p in 1:n.models){
#define cluster membership
membership <- m0
#assign groups, group1 - group2 is the same as group2 - group1 because network is undirected
design.matrix$group <- NA
for(i in 1:nrow(design.matrix)){
mm1 <- membership[srkw_attributes$id == design.matrix$name1[i]]
mm2 <- membership[srkw_attributes$id == design.matrix$name2[i]]
# design.matrix$group[i] <- mm1 == mm2
# design.matrix$group[i] <- (as.numeric(as.character(mm1)) * (mm1 == mm2))
design.matrix$group[i] <- paste0(sort(c(mm1, mm2)), collapse = "")
}
design.matrix$group <- as.factor(design.matrix$group)
#################################
### Modeling
#################################
### GLMADAPTIVE
# ZIP with Random Effects
# removing kinship from the zero inflated part because the hessian is uninvertible.
n_groups <- length(levels(design.matrix$group) )
#levels(design.matrix$group) <- LETTERS[1:10]
#design.matrix$group
if(p == 1){
mod_zir <-
GLMMadaptive::mixed_model(
fixed = surface ~ mategroup*agediff+ is.mother*agediff + log(tdiff + 1),
random = ~ 1| as.factor(group),
data = design.matrix,
family = GLMMadaptive::zi.poisson(),
zi_fixed = surface ~ mategroup * agediff +log(tdiff + 1) , #no is mother because no 0's in surfacing
zi_random = ~ 1 | as.factor(group),
max_coef_value = 10000)
}else if(p==2){
mod_zir <-
GLMMadaptive::mixed_model(
fixed = surface ~ samesex*agediff+ log(tdiff + 1),
random = ~ 1| as.factor(group),
data = design.matrix,
family = GLMMadaptive::zi.poisson(),
zi_fixed = surface ~ samesex * agediff+log(tdiff + 1) , #no is mother because no 0's in surfacing
zi_random = ~ 1 | as.factor(group),
max_coef_value = 10000)
}
model_results_surface[[p]] <- mod_zir
#######################################
### Running the simulations
#######################################
NSIM = 10000
# 3. ZIP with Random Effects 1
simulation_results_surface[[p]] <- sim_results(mod_zir , design.matrix, NSIM)
mallow_results_surface[[p]] <- mallow_test(simulation_results_surface[[p]])
}
summary(model_results_surface[[1]])
##
## Call:
## GLMMadaptive::mixed_model(fixed = surface ~ mategroup * agediff +
## is.mother * agediff + log(tdiff + 1), random = ~1 | as.factor(group),
## data = design.matrix, family = GLMMadaptive::zi.poisson(),
## zi_fixed = surface ~ mategroup * agediff + log(tdiff + 1),
## zi_random = ~1 | as.factor(group), max_coef_value = 10000)
##
## Data Descriptives:
## Number of Observations: 231
## Number of Groups: 21
##
## Model:
## family: zero-inflated poisson
## link: log
##
## Fit statistics:
## log.Lik AIC BIC
## -543.8293 1133.659 1157.683
##
## Random effects covariance matrix:
## StdDev Corr
## (Intercept) 0.8841
## zi_(Intercept) 2.1994 -0.3988
##
## Fixed effects:
## Estimate Std.Err z-value p-value
## (Intercept) -13.5168 0.9157 -14.7614 < 1e-04
## mategroupM -0.1622 0.1567 -1.0356 0.30040683
## mategroupNO -0.3538 0.1273 -2.7780 0.00546888
## mategroupRA -0.5706 0.1495 -3.8160 0.00013566
## agediff -0.0218 0.0077 -2.8222 0.00477026
## is.motherTRUE 1.2644 0.2402 5.2641 < 1e-04
## log(tdiff + 1) 1.9996 0.1099 18.1972 < 1e-04
## mategroupM:agediff -0.0146 0.0207 -0.7052 0.48069988
## mategroupNO:agediff 0.0277 0.0096 2.8843 0.00392254
## mategroupRA:agediff 0.0446 0.0160 2.7848 0.00535690
## agediff:is.motherTRUE -0.0356 0.0148 -2.4094 0.01597711
##
## Zero-part coefficients:
## Estimate Std.Err z-value p-value
## (Intercept) 4.3918 7.4296 0.5911 0.554442
## mategroupM -15.5561 15.6374 -0.9948 0.319832
## mategroupNO 1.7154 2.2046 0.7781 0.436508
## mategroupRA 0.8945 2.6566 0.3367 0.736328
## agediff 0.1505 0.0863 1.7431 0.081322
## log(tdiff + 1) -1.4325 0.9959 -1.4384 0.150308
## mategroupM:agediff 0.7468 0.7670 0.9737 0.330216
## mategroupNO:agediff -0.0624 0.0995 -0.6271 0.530583
## mategroupRA:agediff 0.0249 0.1575 0.1584 0.874157
##
## Integration:
## method: adaptive Gauss-Hermite quadrature rule
## quadrature points: 11
##
## Optimization:
## method: hybrid EM and quasi-Newton
## converged: TRUE
marginal_coefs(model_results_surface[[1]], std_errors = TRUE)
## Estimate Std.Err z-value p-value
## (Intercept) -15.9523 2.6969 -5.9151 < 1e-04
## mategroupM -0.4350 1.8268 -0.2381 0.811784
## mategroupNO -0.8144 0.3834 -2.1243 0.033642
## mategroupRA -0.7959 0.3626 -2.1948 0.028181
## agediff -0.0547 0.0223 -2.4549 0.014092
## is.motherTRUE 0.7092 0.5278 1.3437 0.179033
## log(tdiff + 1) 2.4178 0.3651 6.6223 < 1e-04
## mategroupM:agediff 0.0154 0.0983 0.1562 0.875885
## mategroupNO:agediff 0.0556 0.0287 1.9386 0.052545
## mategroupRA:agediff 0.0610 0.0318 1.9179 0.055122
## agediff:is.motherTRUE -0.0066 0.0266 -0.2461 0.805622
summary(model_results_surface[[2]])
##
## Call:
## GLMMadaptive::mixed_model(fixed = surface ~ samesex * agediff +
## log(tdiff + 1), random = ~1 | as.factor(group), data = design.matrix,
## family = GLMMadaptive::zi.poisson(), zi_fixed = surface ~
## samesex * agediff + log(tdiff + 1), zi_random = ~1 |
## as.factor(group), max_coef_value = 10000)
##
## Data Descriptives:
## Number of Observations: 231
## Number of Groups: 21
##
## Model:
## family: zero-inflated poisson
## link: log
##
## Fit statistics:
## log.Lik AIC BIC
## -576.3204 1178.641 1192.22
##
## Random effects covariance matrix:
## StdDev Corr
## (Intercept) 1.0912
## zi_(Intercept) 1.8554 -0.5249
##
## Fixed effects:
## Estimate Std.Err z-value p-value
## (Intercept) -16.6436 0.7931 -20.9846 < 1e-04
## samesexTRUE 0.1809 0.0908 1.9912 0.04645514
## agediff 0.0131 0.0053 2.4993 0.01244538
## log(tdiff + 1) 2.3606 0.0961 24.5751 < 1e-04
## samesexTRUE:agediff -0.0269 0.0072 -3.7566 0.00017222
##
## Zero-part coefficients:
## Estimate Std.Err z-value p-value
## (Intercept) 4.2997 6.4822 0.6633 0.507130
## samesexTRUE -2.7027 2.5636 -1.0542 0.291777
## agediff 0.0941 0.0571 1.6478 0.099401
## log(tdiff + 1) -1.1561 0.8871 -1.3032 0.192508
## samesexTRUE:agediff 0.0882 0.1073 0.8219 0.411121
##
## Integration:
## method: adaptive Gauss-Hermite quadrature rule
## quadrature points: 11
##
## Optimization:
## method: hybrid EM and quasi-Newton
## converged: TRUE
marginal_coefs(model_results_surface[[2]], std_errors = TRUE)
## Estimate Std.Err z-value p-value
## (Intercept) -18.3357 2.3727 -7.7278 < 1e-04
## samesexTRUE 0.4218 0.2841 1.4845 0.13768
## agediff 0.0030 0.0141 0.2169 0.82827
## log(tdiff + 1) 2.6669 0.3098 8.6079 < 1e-04
## samesexTRUE:agediff -0.0417 0.0184 -2.2693 0.02325
lapply(model_results_surface, BIC)
## [[1]]
## [1] 1157.683
##
## [[2]]
## [1] 1192.22
lapply(model_results_surface, AIC)
## [[1]]
## [1] 1133.659
##
## [[2]]
## [1] 1178.641
post.modes1 <- model_results_surface[[1]]$post_modes
post.modes2 <- model_results_surface[[2]]$post_modes
post.modes <- cbind(post.modes1, post.modes2)
colnames(post.modes) <- c("Counts1", "Zeros1", "Counts2", "Zeros2")
post.modes <-
post.modes %>%
as.data.frame() %>%
mutate_all(round, 3)
matrilines <- c("J11s" , "J14s", "J16s", "J17s", "J19s", "J22s")
post.modes <-
unique(t(apply(expand.grid(matrilines, matrilines), 1, sort))) %>%
as.data.frame() %>%
mutate(names = paste0(V1, "-", V2)) %>%
cbind(post.modes) %>%
dplyr::select(-c(V1, V2))
post.modes
## names Counts1 Zeros1 Counts2 Zeros2
## 11 J11s-J11s 0.217 -0.492 -0.012 -0.323
## 12 J11s-J14s -0.161 4.172 -0.468 3.258
## 13 J11s-J16s 1.385 -2.994 1.595 -2.428
## 14 J11s-J17s -0.594 0.791 -0.853 0.887
## 15 J11s-J19s -0.351 -1.101 -0.556 -0.678
## 16 J11s-J22s -1.930 1.643 -2.119 1.731
## 22 J14s-J14s 0.044 -0.150 0.129 -0.223
## 23 J14s-J16s 1.515 0.103 1.838 -0.127
## 24 J14s-J17s -0.604 0.731 -0.837 0.593
## 25 J14s-J19s -0.071 2.018 -0.304 1.531
## 26 J14s-J22s -0.255 1.370 -0.486 1.250
## 33 J16s-J16s 1.387 -1.459 1.722 -1.795
## 34 J16s-J17s 0.924 -0.697 1.415 -1.128
## 35 J16s-J19s 0.777 -0.709 1.244 -1.020
## 36 J16s-J22s 0.478 -0.742 0.772 -0.875
## 44 J17s-J17s -0.132 -0.001 -0.429 0.219
## 45 J17s-J19s -0.758 3.084 -0.939 2.537
## 46 J17s-J22s -1.323 0.020 -1.490 0.365
## 55 J19s-J19s 0.094 -0.497 0.414 -0.704
## 56 J19s-J22s -0.441 1.846 -0.662 1.375
## 66 J22s-J22s 0.148 -0.255 0.524 -0.511
# From GLMMadaptive vignettes
set.seed(28397)
## Model 1
par(mar = c(2.5, 2.5, 0, 0), mgp = c(1.1, 0.5, 0), cex.axis = 0.7, cex.lab = 0.8)
y <- design.matrix$surface
y[y > 0] <- log(y[y > 0])
x_vals <- seq(min(y), max(y), length.out = 500)
out <- simulate(model_results_surface[[1]], nsim = 100, acount_MLEs_var = TRUE)
ind <- out > sqrt(.Machine$double.eps)
out[ind] <- log(out[ind])
rep_y <- apply(out, 2, function (x, x_vals) ecdf(x)(x_vals), x_vals = x_vals)
matplot(x_vals, rep_y, type = "l", lty = 1, col = "lightgrey",
xlab = "log(Response)", ylab = "Empirical CDF")
lines(x_vals, ecdf(y)(x_vals))
legend("bottomright", c("Simulated data ", "Observed data "), lty = 1,
col = c("lightgrey", "black"), bty = "n", cex = 0.8)
set.seed(783495)
### Model 2
par(mar = c(2.5, 2.5, 0, 0), mgp = c(1.1, 0.5, 0), cex.axis = 0.7, cex.lab = 0.8)
y <- design.matrix$surface
y[y > 0] <- log(y[y > 0])
x_vals <- seq(min(y), max(y), length.out = 500)
out <- simulate(model_results_surface[[2]], nsim = 100, acount_MLEs_var = TRUE)
ind <- out > sqrt(.Machine$double.eps)
out[ind] <- log(out[ind])
rep_y <- apply(out, 2, function (x, x_vals) ecdf(x)(x_vals), x_vals = x_vals)
matplot(x_vals, rep_y, type = "l", lty = 1, col = "lightgrey",
xlab = "log(Response)", ylab = "Empirical CDF")
lines(x_vals, ecdf(y)(x_vals))
legend("bottomright", c("Simulated data ", "Observed data "), lty = 1,
col = c("lightgrey", "black"), bty = "n", cex = 0.8)
###Plotting Zero Inflation factors -----------------------
# Manually predict the zero-inflation probabilities
zi_predictions <-
GLMMadaptive::predict(
model_results_surface[[1]],
design.matrix,
type = "zero_part" )
# Create a data frame with predicted values and the original data
predicted_df <- data.frame(
group = design.matrix$group,
surface = design.matrix$surface,
samesex = design.matrix$samesex,
zi_prob = zi_predictions
)
# Use ggplot to visualize
## Zero Probabilities
set.seed(9348724)
predicted_df %>%
mutate(is.zero = surface == 0) %>%
filter(grepl("2", group)) %>%
ggplot(aes(x = group, y = zi_prob, color = is.zero)) +
geom_jitter(width = 0.15, height = 0) +
labs(title = "Predicted Zero-Inflation Probabilities",
subtitle = "Matrilineal Line J14's",
y = "Zero-Inflation Probability", x = "Matrilineal Line") +
scale_x_discrete(labels = c("J11s", "J14s", "J16s", "J17s", "J19s", "J22s")) +
scale_color_manual(values = c("#F8766D", "#619CFF"),
name = "True Observation",
labels = c("Count", "Zero")) +
theme_minimal()
### Estimated Zero Inflation Probabilities
# "outlier" of (9, 0.42) is J16-J42 which are mother-and-daughter (35 year age diff)
# "outlier" of (9, 0.12) is J19-J51 which are grandmother-and-grandson
# their physical attributes (fixed effects) suggest they may not surface together often
# but their familial relationship (random effects) suggests they do surface often
# only fixed effects are shown in this plot
predicted_df %>%
mutate(is.zero = surface == 0) %>%
ggplot(aes(x = surface, y = zi_prob)) +
geom_point() +
xlim(0, 25) +
theme_minimal() +
xlab("Observed Surfacings") +
ylab("Zero Inflation Probabilty Estimate")
### Maximum ZI prob over 25
predicted_df %>%
mutate(is.zero = surface == 0) %>%
filter(surface > 25) %>%
arrange(zi_prob) %>%
tail(1)
## group surface samesex zi_prob is.zero
## 181 66 35 FALSE 0.03403324 FALSE
predicted_df %>%
mutate(is.zero = surface == 0) %>%
left_join(
post.modes %>%
dplyr::select(names, Zeros1)%>%
mutate(group = factor(names, labels = levels(design.matrix$group)))
) %>%
mutate(step1 = LaplacesDemon::logit(zi_prob)) %>%
mutate(step2 = step1 + Zeros1) %>%
mutate(step3 = LaplacesDemon::invlogit(step2)) %>%
ggplot(aes(x = surface, y = step3)) +
geom_point() +
xlim(0, 25) +
theme_minimal() +
ggtitle("ZI Probs with Random Effects")+
xlab("Observed Surfacings") +
ylab("Zero Inflation Probabilty Estimate")
#Verify if the predictions make sense
# design.matrix %>%
# dplyr::select(group, surface) %>%
# group_by(group) %>%
# summarise(zeros = sum(surface == 0),
# count = n()) %>%
# filter(grepl("2", group))
# design.matrix %>%
# dplyr::select(group, surface) %>%
# group_by(group) %>%
# filter(surface>0) %>%
# summarise(nzavg = mean(surface),
# count = n()) %>%
# filter(grepl("1", group))
#
# table(srkw_attributes$matriline)
#
# design.matrix %>%
# dplyr::select(group, surface) %>%
# filter(surface>0) %>%
# filter(grepl("1", group)) %>%
# ggplot(aes(x = group, y = surface)) +
# geom_point()
#
# design.matrix %>%
# dplyr::select(group, surface) %>%
# filter(grepl("1", group)) %>%
# ggplot(aes(x = surface)) +
# geom_histogram() +
# facet_wrap(~ group)
###Plotting Zero Inflation factors -----------------------
# Manually predict the zero-inflation probabilities
zi_predictions <-
GLMMadaptive::predict(
model_results_surface[[2]],
design.matrix,
type = "zero_part" )
# Create a data frame with predicted values and the original data
predicted_df <- data.frame(
group = design.matrix$group,
surface = design.matrix$surface,
samesex = design.matrix$samesex,
zi_prob = zi_predictions
)
# Use ggplot to visualize
## Zero Probabilities
set.seed(4545)
predicted_df %>%
mutate(is.zero = surface == 0) %>%
filter(grepl("2", group)) %>%
ggplot(aes(x = group, y = zi_prob, color = is.zero)) +
geom_jitter(width = 0.15, height = 0) +
labs(title = "Predicted Zero-Inflation Probabilities",
subtitle = "Matrilineal Line J14's",
y = "Zero-Inflation Probability", x = "Matrilineal Line") +
scale_x_discrete(labels = c("J11s", "J14s", "J16s", "J17s", "J19s", "J22s")) +
scale_color_manual(values = c("#F8766D", "#619CFF"),
name = "True Observation",
labels = c("Count", "Zero")) +
theme_minimal()
### Estimated Zero Inflation Probabilities
# "outlier" of (9, 0.48) is J16-J42 which are mother-and-daughter (35 year age diff)
# "outlier" of (9, 0.19) is J19-J51 which are grandmother-and-grandson (36 year age diff)
# their physical attributes (fixed effects) suggest they may not surface together often
# but their familial relationship (random effects) suggests they do surface often
# only fixed effects are shown in this plot
predicted_df %>%
mutate(is.zero = surface == 0) %>%
ggplot(aes(x = surface, y = zi_prob)) +
geom_point() +
xlim(0, 25) +
theme_minimal() +
xlab("Observed Surfacings") +
ylab("Zero Inflation Probabilty Estimate")
### Maximum ZI prob over 25
predicted_df %>%
mutate(is.zero = surface == 0) %>%
filter(surface > 25) %>%
arrange(zi_prob) %>%
tail(1)
## group surface samesex zi_prob is.zero
## 181 66 35 FALSE 0.02873409 FALSE
#
#
# #Verify if the predictions make sense
# design.matrix %>%
# dplyr::select(group, surface) %>%
# group_by(group) %>%
# summarise(zeros = sum(surface == 0),
# count = n()) %>%
# filter(grepl("2", group))
#
# design.matrix %>%
# dplyr::select(group, surface) %>%
# group_by(group) %>%
# filter(surface>0) %>%
# summarise(nzavg = mean(surface),
# count = n()) %>%
# filter(grepl("1", group))
#
# table(srkw_attributes$matriline)
#
# design.matrix %>%
# dplyr::select(group, surface) %>%
# filter(surface>0) %>%
# filter(grepl("1", group)) %>%
# ggplot(aes(x = group, y = surface)) +
# geom_point()
#
# design.matrix %>%
# dplyr::select(group, surface) %>%
# filter(grepl("1", group)) %>%
# ggplot(aes(x = surface)) +
# geom_histogram() +
# facet_wrap(~ group)
For all predicted values, if \(\hat{y}_{ij} < 0.5\), then \(\hat{y}_{ij}\) is considered a prediced zero.
subject_predictions <- GLMMadaptive::predict(
model_results_surface[[1]],
design.matrix,
type = "subject_specific" )
subject_predictions_df <- data.frame(
group = design.matrix$group,
surface = design.matrix$surface,
samesex = design.matrix$samesex,
pred = subject_predictions
)
# number of predicted zeros vs true zeros
subject_predictions_df %>%
mutate(predict.zero = pred < 0.5) %>%
group_by(group) %>%
summarise(zi_predictions = sum(predict.zero),
zi_true = sum(surface==0),
count = n()) %>%
filter(grepl("2", group))
## # A tibble: 6 × 4
## group zi_predictions zi_true count
## <fct> <int> <int> <int>
## 1 12 2 8 16
## 2 22 0 0 6
## 3 23 8 10 16
## 4 24 0 1 20
## 5 25 0 2 12
## 6 26 0 1 8
subject_predictions_df <-
subject_predictions_df %>%
mutate(predict.zero = pred < 0.5) %>%
mutate(is.zero = surface ==0) %>%
mutate(error = surface - pred) %>%
mutate(rel_error = error / pred )
## Plot for predicted coutns
set.seed(989)
subject_predictions_df %>%
filter(grepl("2", group)) %>%
filter(!predict.zero) %>%
ggplot(aes(x = group, y = pred, color = is.zero)) +
geom_jitter(width = 0.15, height = 0) +
labs(title = "Predicted Counts - Subject Specific",
subtitle = "With Matrilineal Line J14's",
y = "Counts of Surfacing", x = "Matrilineal Line") +
scale_x_discrete(labels = c("J11s", "J14s", "J16s", "J17s", "J19s", "J22s")) +
scale_color_manual(values = c("#F8766D", "#619CFF"),
name = "True Observation",
labels = c("Count", "Zero")) +
theme_minimal()#+theme(panel.grid.major = element_line(color = "darkgray"))
## Error plot
subject_predictions_df %>%
ggplot(aes(x = surface, y = error)) +
geom_point() +
facet_wrap(~is.zero, scales = "free" )+
xlab("True Observation") +
ylab("Relative Error") +
ggtitle("Relative Error Vs True Observation") +
scale_color_manual(values = c("#F8766D", "#619CFF"),
name = "True Observation",
labels = c("Count", "Zero"))
subject_predictions_df %>%
filter(!is.zero) %>%
ggplot(aes(x = surface, y = error)) +
geom_point() +
#facet_wrap(~is.zero, scales = "free" )+
xlab("True Observation") +
ylab("Error") +
ggtitle("Error Vs True Counts Observation") +
scale_color_manual(values = c("#F8766D", "#619CFF"),
name = "True Observation",
labels = c("Count", "Zero"))
subject_predictions_df %>%
filter(pred >= 0.5) %>%
ggplot(aes(x = surface, y = rel_error)) +
geom_point() +
#facet_wrap(~is.zero, scales = "free" )+
xlab("True Observation") +
ylab("Relative Error") +
ggtitle("Relative of Counts Predictions Vs True Observation") +
scale_color_manual(values = c("#F8766D", "#619CFF"),
name = "True Observation",
labels = c("Count", "Zero"))
subject_predictions_df %>%
filter(is.zero) %>%
ggplot(aes(x = surface, y = abs(error))) +
geom_boxplot(coeff = 1000) +
#facet_wrap(~is.zero, scales = "free" )+
xlab("True Observation") +
ylab("Absolute Error") +
ggtitle("Absolute Error Vs True Observation") +
scale_color_manual(values = c("#F8766D", "#619CFF"),
name = "True Observation",
labels = c("Count", "Zero")) +
theme_minimal()
### Error for the true counts
subject_predictions_df %>%
filter(!is.zero) %>%
ggplot(aes(x = surface, y = error)) +
geom_point() +
#facet_wrap(~is.zero, scales = "free" )+
xlab("Observed Counts") +
ylab("Error") +
ggtitle("Error vs Observed Counts") +
xlim(0, 75) + # removes one point at (162, -11)
theme_minimal()
# Error for the True zeros
subject_predictions_df %>%
filter(is.zero) %>%
ggplot(aes( x = abs(error))) +
geom_histogram(bins = 10) +
#facet_wrap(~is.zero, scales = "free" )+
ylab("Frequency") +
xlab("Absolute Error") +
ggtitle("Distribution of Absolute Errors for Observed Zeros") +
theme_minimal()
subject_predictions <- GLMMadaptive::predict(
model_results_surface[[2]],
design.matrix,
type = "subject_specific" )
subject_predictions_df <- data.frame(
group = design.matrix$group,
is.mother = design.matrix$is.mother,
surface = design.matrix$surface,
samesex = design.matrix$samesex,
pred = subject_predictions
)
# number of predicted zeros vs true zeros
subject_predictions_df %>%
mutate(predict.zero = pred < 0.5) %>%
group_by(group) %>%
summarise(zi_predictions = sum(predict.zero),
zi_true = sum(surface==0),
count = n()) %>%
filter(grepl("2", group))
## # A tibble: 6 × 4
## group zi_predictions zi_true count
## <fct> <int> <int> <int>
## 1 12 2 8 16
## 2 22 0 0 6
## 3 23 11 10 16
## 4 24 0 1 20
## 5 25 0 2 12
## 6 26 0 1 8
subject_predictions_df <-
subject_predictions_df %>%
mutate(predict.zero = pred < 0.5) %>%
mutate(is.zero = surface ==0) %>%
mutate(error = surface - pred) %>%
mutate(rel_error = error / pred )
set.seed(989)
subject_predictions_df %>%
mutate(predict.zero = pred < 0.5) %>%
mutate(is.zero = surface ==0) %>%
filter(grepl("2", group)) %>%
ggplot(aes(x = group, y = pred, color = is.zero)) +
geom_jitter(width = 0.15, height = 0) +
labs(title = "Predicted Response - Subject Specific",
subtitle = "Matrilineal Line J14's",
y = "Counts of Surfacing", x = "Matrilineal Line") +
scale_x_discrete(labels = c("J11s", "J14s", "J16s", "J17s", "J19s", "J22s")) +
scale_color_manual(values = c("#F8766D", "#619CFF"),
name = "True Observation",
labels = c("Count", "Zero")) +
theme_minimal()+
theme(panel.grid.major = element_line(color = "darkgray"))
subject_predictions_df %>%
mutate(error = surface - pred) %>%
mutate(rel_error = error / pred ) %>%
mutate(is.zero = surface ==0) %>%
filter(!is.zero) %>%
ggplot(aes(x = surface, y = error)) +
geom_point() +
#facet_wrap(~is.zero, scales = "free" )+
xlab("True Observation") +
ylab("Error") +
ggtitle("Error Vs True Counts Observation") +
scale_color_manual(values = c("#F8766D", "#619CFF"),
name = "True Observation",
labels = c("Count", "Zero"))
subject_predictions_df %>%
mutate(error = surface - pred) %>%
mutate(rel_error = error / pred ) %>%
mutate(is.zero = surface ==0) %>%
filter(pred >= 0.5) %>%
ggplot(aes(x = surface, y = rel_error)) +
geom_point() +
#facet_wrap(~is.zero, scales = "free" )+
xlab("True Observation") +
ylab("Relative Error") +
ggtitle("Relative of Counts Predictions Vs True Observation") +
scale_color_manual(values = c("#F8766D", "#619CFF"),
name = "True Observation",
labels = c("Count", "Zero"))
subject_predictions_df %>%
mutate(error = surface - pred) %>%
mutate(rel_error = error / pred ) %>%
mutate(is.zero = surface ==0) %>%
filter(is.zero) %>%
ggplot(aes(x = surface, y = abs(error))) +
geom_boxplot(coeff = 1000) +
#facet_wrap(~is.zero, scales = "free" )+
xlab("True Observation") +
ylab("Absolute Error") +
ggtitle("Absolute Error Vs True Observation") +
scale_color_manual(values = c("#F8766D", "#619CFF"),
name = "True Observation",
labels = c("Count", "Zero")) +
theme_minimal()
### Error for the true counts
subject_predictions_df %>%
filter(!is.zero) %>%
ggplot(aes(x = surface, y = error)) +
geom_point() +
xlab("Observed Count") +
ylab("Error") +
ggtitle("Error vs Observed Counts") +
xlim(0, 75) + # removes one point at (162, -4.3)
theme_minimal()
# Error for the True zeros
subject_predictions_df %>%
filter(is.zero) %>%
ggplot(aes( x = abs(error))) +
geom_histogram(bins = 10) +
ylab("Frequency") +
xlab("Absolute Error") +
ggtitle("Distribution of Absolute Errors for Observed Zeros") +
theme_minimal()
#### ECDF Plots ----------------------
dist_plots_surface <- plot_ecdf(simulation_results_surface)
egg::ggarrange(plots = dist_plots_surface$eigen,
nrow = 1, ncol = n.models)
egg::ggarrange(plots = dist_plots_surface$close,
nrow = 1, ncol = n.models)
wass_plots_testing <- plot_wass_density_surface(mallow_results_surface)
wass_plots_testing[[1]]
wass_plots_testing[[2]]
# Interaction plots --------------------------
### Model 2
nDF2 <- with(design.matrix,
expand.grid(agediff = seq(min(agediff), 40, length.out = 30),
samesex = levels(samesex),
tdiff = quantile(tdiff, c(0.25, 0.5, 0.75)),
group = levels(group)))
nDF2$surface <- predict(model_results_surface[[2]], nDF2)
marginal_coefs(model_results_surface[[2]], std_errors = TRUE)
## Estimate Std.Err z-value p-value
## (Intercept) -18.3357 2.3727 -7.7278 < 1e-04
## samesexTRUE 0.4218 0.2841 1.4845 0.13768
## agediff 0.0030 0.0141 0.2169 0.82827
## log(tdiff + 1) 2.6669 0.3098 8.6079 < 1e-04
## samesexTRUE:agediff -0.0417 0.0184 -2.2693 0.02325
#model_results_surface[[2]]$model_frames
#fitted(model_results_surface[[2]], type = "subject_specific")
nDF2 <- with(design.matrix,
expand.grid(agediff = seq(min(agediff), 40, length.out = 30),
samesex = levels(samesex),
tdiff = quantile(tdiff, c(0.4, 0.6, 0.8)),
group = levels(group)))
# get marginal effects
nMM2 <- model.matrix(~ samesex*agediff + log(tdiff+1), data = nDF2)
marg.counts <- marginal_coefs(model_results_surface[[2]], std_errors = T)
betac <- marginal_coefs(model_results_surface[[2]])# (as.matrix(model_results_surface[[2]]$coefficients))
betac <- as.matrix(betac$betas)
head(nMM2)
## (Intercept) samesexTRUE agediff log(tdiff + 1) samesexTRUE:agediff
## 1 1 0 0.000000 7.534956 0
## 2 1 0 1.379310 7.534956 0
## 3 1 0 2.758621 7.534956 0
## 4 1 0 4.137931 7.534956 0
## 5 1 0 5.517241 7.534956 0
## 6 1 0 6.896552 7.534956 0
nDF2$pred <- nMM2 %*% betac
sex.labs <- c("Opposite Sexes", "Same Sex")
names(sex.labs) <- c(FALSE, TRUE)
ggplot(nDF2, aes(x = agediff, color = as.factor(tdiff))) +
geom_line(aes(y = exp(pred))) +
facet_wrap(~samesex,
labeller = labeller(samesex = sex.labs)) +
scale_color_manual(values = c("#F8766D", "#00BA38", "#619CFF"),
labels = c("0.4", "0.6", "0.8"),
name = "Quantile of Film \nCo-Occurrence") +
xlab("Age Difference") +
ylab("E(Count)") +
ggtitle("Expected Marginal Effects for the Surfacing Counts") +
theme_minimal() +
theme(axis.line.x = element_line())+
scale_y_continuous(expand=c(0,1.5)) +
scale_x_continuous(expand=c(0,0.5)) +
geom_vline(xintercept=0)
# store centrality statistics for plotting later
whale.degree <- degree(gwhale.contact)
whale.strength <- strength(gwhale.contact)
whale.eigen <- eigen_centrality(gwhale.contact)$vector
whale.close <- closeness(gwhale.contact, normalized = T)
whale.between <- betweenness(gwhale.contact, normalized = T)
dat.plot.true <- data.frame(degree.sim = whale.degree,
strength.sim = whale.strength,
eigen.sim =whale.eigen,
between.sim = whale.between,
close.sim = whale.close,
id = 20000)
set.seed(43453)
model_results <- vector(mode = "list", length = 4)
simulation_results <- vector(mode = "list", length = 4)
mallow_results <- vector(mode = "list", length = 4)
set.seed(9941429)
for(p in 1:4){
# make design matrix
design.matrix <- as.data.frame(t(combn(1:n, 2)))
#define cluster membership
membership <-
case_when(
p == 1 ~ m1,
p == 2 ~ m2,
p == 3 ~ m3,
p == 4 ~ m4
)
design.matrix[, c("name1", "name2",
"kinship",
"age1", "age2", "agediff",
"sex1", "sex2", "samesex",
"t1", "t2", "t12", "tdiff",
"surface", "contact",
"m1", "m2")] <- NA
for(i in 1:nrow(design.matrix)){
v1 <- design.matrix$V1[i]
v2 <- design.matrix$V2[i]
n1 <- design.matrix$name1[i] <- V(gwhale.contact)$name[v1]
n2 <- design.matrix$name2[i] <- V(gwhale.contact)$name[v2]
design.matrix$m1[i] <- membership[v1]
design.matrix$m2[i] <- membership[v2]
design.matrix$kinship[i] <- srkw_kinship[n1, n2]
design.matrix$age1[i] <- srkw_attributes$age[srkw_attributes$id == n1]
design.matrix$age2[i] <- srkw_attributes$age[srkw_attributes$id == n2]
design.matrix$agediff[i] <- abs(design.matrix$age1[i] - design.matrix$age2[i] )
design.matrix$sex1[i] <- srkw_attributes$sex[srkw_attributes$id == n1]
design.matrix$sex2[i] <- srkw_attributes$sex[srkw_attributes$id == n2]
design.matrix$samesex[i] <- design.matrix$sex1[i] == design.matrix$sex2[i]
design.matrix$t1[i] <- srkw_sampling[n1, n1]
design.matrix$t2[i] <- srkw_sampling[n2, n2]
design.matrix$t12[i] <- srkw_sampling[n1, n2]
design.matrix$tdiff[i] <- design.matrix$t1[i] + design.matrix$t2[i] - design.matrix$t12[i]
design.matrix$contact[i] <- srkw_contact[n1, n2]
design.matrix$surface[i] <- srkw_surfacing[n1, n2]
}
#assign groups, group1 - group2 is the same as group2 - group1 because network is undirected
design.matrix$group <- NA
for(i in 1:nrow(design.matrix)){
mm1 <- design.matrix$m1[i]
mm2 <- design.matrix$m2[i]
design.matrix$group[i] <- paste0(sort(c(mm1, mm2)), collapse = "")
}
design.matrix$group <- as.factor(design.matrix$group)
design.matrix$samesex <- as.factor(design.matrix$samesex)
#################################
### Modeling
#################################
### GLMADAPTIVE
# ZIP with Random Effects
# removing kinship from the zero inflated part because the hessian is uninvertible.
n_groups <- length(levels(design.matrix$group) )
#levels(design.matrix$group) <- LETTERS[1:10]
#design.matrix$group
mod_zir <-
GLMMadaptive::mixed_model(
fixed = contact ~ agediff * samesex+ log(tdiff + 1),
random = ~ 1| as.factor(group),
data = design.matrix,
family = GLMMadaptive::zi.poisson(),
zi_fixed = contact ~ agediff * samesex + log(tdiff + 1) ,
zi_random = ~ 1 | as.factor(group),
max_coef_value = 10000)
model_results[[p]] <- mod_zir
#######################################
### Running the simulations
#######################################
NSIM = 10000
# 3. ZIP with Random Effects 1
simulation_results[[p]] <- sim_results(mod_zir , design.matrix, NSIM)
mallow_results[[p]] <- mallow_test(simulation_results[[p]])
}
lapply(model_results, summary)
## [[1]]
##
## Call:
## GLMMadaptive::mixed_model(fixed = contact ~ agediff * samesex +
## log(tdiff + 1), random = ~1 | as.factor(group), data = design.matrix,
## family = GLMMadaptive::zi.poisson(), zi_fixed = contact ~
## agediff * samesex + log(tdiff + 1), zi_random = ~1 |
## as.factor(group), max_coef_value = 10000)
##
## Data Descriptives:
## Number of Observations: 231
## Number of Groups: 15
##
## Model:
## family: zero-inflated poisson
## link: log
##
## Fit statistics:
## log.Lik AIC BIC
## -429.6098 885.2196 894.4243
##
## Random effects covariance matrix:
## StdDev Corr
## (Intercept) 1.0916
## zi_(Intercept) 1.3894 -0.9775
##
## Fixed effects:
## Estimate Std.Err z-value p-value
## (Intercept) -13.0949 1.1029 -11.8736 < 1e-04
## agediff -0.0084 0.0102 -0.8191 0.41272
## samesexTRUE 0.0390 0.1239 0.3147 0.75297
## log(tdiff + 1) 1.8593 0.1285 14.4696 < 1e-04
## agediff:samesexTRUE 0.0045 0.0123 0.3617 0.71758
##
## Zero-part coefficients:
## Estimate Std.Err z-value p-value
## (Intercept) 8.6022 3.2536 2.6439 0.0081968
## agediff 0.0270 0.0322 0.8408 0.4004673
## samesexTRUE -3.6948 1.1537 -3.2025 0.0013624
## log(tdiff + 1) -1.1204 0.4050 -2.7664 0.0056684
## agediff:samesexTRUE 0.1338 0.0615 2.1764 0.0295235
##
## Integration:
## method: adaptive Gauss-Hermite quadrature rule
## quadrature points: 11
##
## Optimization:
## method: hybrid EM and quasi-Newton
## converged: TRUE
##
## [[2]]
##
## Call:
## GLMMadaptive::mixed_model(fixed = contact ~ agediff * samesex +
## log(tdiff + 1), random = ~1 | as.factor(group), data = design.matrix,
## family = GLMMadaptive::zi.poisson(), zi_fixed = contact ~
## agediff * samesex + log(tdiff + 1), zi_random = ~1 |
## as.factor(group), max_coef_value = 10000)
##
## Data Descriptives:
## Number of Observations: 231
## Number of Groups: 15
##
## Model:
## family: zero-inflated poisson
## link: log
##
## Fit statistics:
## log.Lik AIC BIC
## -429.6903 885.3806 894.5852
##
## Random effects covariance matrix:
## StdDev Corr
## (Intercept) 0.7153
## zi_(Intercept) 1.4424 -0.8967
##
## Fixed effects:
## Estimate Std.Err z-value p-value
## (Intercept) -9.8698 0.9070 -10.8821 < 1e-04
## agediff -0.0134 0.0098 -1.3685 0.17116
## samesexTRUE -0.0004 0.1246 -0.0034 0.99728
## log(tdiff + 1) 1.4458 0.1058 13.6697 < 1e-04
## agediff:samesexTRUE 0.0165 0.0115 1.4312 0.15238
##
## Zero-part coefficients:
## Estimate Std.Err z-value p-value
## (Intercept) 6.8852 2.9266 2.3527 0.01863913
## agediff 0.0362 0.0333 1.0869 0.27709119
## samesexTRUE -3.4386 0.9191 -3.7415 0.00018295
## log(tdiff + 1) -0.8841 0.3604 -2.4530 0.01416641
## agediff:samesexTRUE 0.1267 0.0553 2.2921 0.02190213
##
## Integration:
## method: adaptive Gauss-Hermite quadrature rule
## quadrature points: 11
##
## Optimization:
## method: hybrid EM and quasi-Newton
## converged: TRUE
##
## [[3]]
##
## Call:
## GLMMadaptive::mixed_model(fixed = contact ~ agediff * samesex +
## log(tdiff + 1), random = ~1 | as.factor(group), data = design.matrix,
## family = GLMMadaptive::zi.poisson(), zi_fixed = contact ~
## agediff * samesex + log(tdiff + 1), zi_random = ~1 |
## as.factor(group), max_coef_value = 10000)
##
## Data Descriptives:
## Number of Observations: 231
## Number of Groups: 6
##
## Model:
## family: zero-inflated poisson
## link: log
##
## Fit statistics:
## log.Lik AIC BIC
## -458.1494 942.2988 939.5917
##
## Random effects covariance matrix:
## StdDev Corr
## (Intercept) 1.4581
## zi_(Intercept) 2.0515 -0.9920
##
## Fixed effects:
## Estimate Std.Err z-value p-value
## (Intercept) -15.2689 1.3396 -11.3980 < 1e-04
## agediff -0.0010 0.0093 -0.1105 0.91204
## samesexTRUE 0.0434 0.1220 0.3557 0.72203
## log(tdiff + 1) 2.1691 0.1271 17.0616 < 1e-04
## agediff:samesexTRUE -0.0183 0.0123 -1.4944 0.13507
##
## Zero-part coefficients:
## Estimate Std.Err z-value p-value
## (Intercept) 10.8132 3.8635 2.7988 0.0051295
## agediff 0.0305 0.0311 0.9801 0.3270283
## samesexTRUE -3.2808 1.0744 -3.0535 0.0022622
## log(tdiff + 1) -1.4290 0.4778 -2.9907 0.0027835
## agediff:samesexTRUE 0.1150 0.0596 1.9297 0.0536413
##
## Integration:
## method: adaptive Gauss-Hermite quadrature rule
## quadrature points: 11
##
## Optimization:
## method: hybrid EM and quasi-Newton
## converged: TRUE
##
## [[4]]
##
## Call:
## GLMMadaptive::mixed_model(fixed = contact ~ agediff * samesex +
## log(tdiff + 1), random = ~1 | as.factor(group), data = design.matrix,
## family = GLMMadaptive::zi.poisson(), zi_fixed = contact ~
## agediff * samesex + log(tdiff + 1), zi_random = ~1 |
## as.factor(group), max_coef_value = 10000)
##
## Data Descriptives:
## Number of Observations: 231
## Number of Groups: 10
##
## Model:
## family: zero-inflated poisson
## link: log
##
## Fit statistics:
## log.Lik AIC BIC
## -460.3485 946.697 950.6306
##
## Random effects covariance matrix:
## StdDev Corr
## (Intercept) 1.2359
## zi_(Intercept) 2.3336 -0.7665
##
## Fixed effects:
## Estimate Std.Err z-value p-value
## (Intercept) -15.2850 1.1108 -13.7607 < 1e-04
## agediff -0.0017 0.0090 -0.1852 0.85308
## samesexTRUE -0.0057 0.1250 -0.0455 0.96371
## log(tdiff + 1) 2.2022 0.1274 17.2819 < 1e-04
## agediff:samesexTRUE -0.0186 0.0117 -1.5889 0.11208
##
## Zero-part coefficients:
## Estimate Std.Err z-value p-value
## (Intercept) 11.0040 4.6820 2.3503 0.0187595
## agediff 0.0423 0.0348 1.2174 0.2234554
## samesexTRUE -3.4029 0.9370 -3.6317 0.0002816
## log(tdiff + 1) -1.5598 0.6105 -2.5548 0.0106264
## agediff:samesexTRUE 0.1288 0.0601 2.1423 0.0321667
##
## Integration:
## method: adaptive Gauss-Hermite quadrature rule
## quadrature points: 11
##
## Optimization:
## method: hybrid EM and quasi-Newton
## converged: TRUE
dist_plots <- plot_ecdf(simulation_results)
egg::ggarrange(plots = dist_plots$eigen,
nrow = 1, ncol = 4)
egg::ggarrange(plots = dist_plots$close,
nrow = 1, ncol = 4)
wass_plots <- plot_wass_density_contact(mallow_results )
wass_plots[[1]]
wass_plots[[2]]